# -*- coding: utf-8 -*-#
#-------------------------------------------------------------------------------
# 建立者:        潘炎珍  
# Name:         test03
# Description:
# Author:       98745
# Date:         2019/4/24
#-------------------------------------------------------------------------------

from lxml import etree

text='''
<div>
    <ul>
         <li class="item-0"><a href="link1.html">第一个</a></li>
         <li class="item-1"><a href="link2.html">second item</a></li>
         <li class="item-0"><a href="link5.html">a属性</a>
     </ul>
 </div>
'''
html=etree.HTML(text) #初始化生成一个XPath解析对象
result=etree.tostring(html,encoding='utf-8')   #解析对象输出代码
print(type(html))
print(type(result))
print(result.decode('utf-8'))

#etree会修复HTML文本节点
# <class 'lxml.etree._Element'>
# <class 'bytes'>
# <html><body><div>
#     <ul>
#          <li class="item-0"><a href="link1.html">第一个</a></li>
#          <li class="item-1"><a href="link2.html">second item</a></li>
#          <li class="item-0"><a href="link5.html">a属性</a>
#      </li></ul>
#  </div>
# </body></html>